library(dplyr,quietly = TRUE,warn.conflicts = FALSE)
library(Seurat,quietly = TRUE,warn.conflicts = FALSE)
library(patchwork,quietly = TRUE,warn.conflicts = FALSE)
library(ggplot2,quietly = TRUE,warn.conflicts = FALSE)
library(harmony,quietly = TRUE,warn.conflicts = FALSE)
set.seed(2020)
rm(list=ls())
setwd('../../')
knitr::opts_knit$set(root.dir = getwd())
config <- yaml::read_yaml("config/config.yaml")
## Warning in readLines(file): incomplete final line found on 'config/config.yaml'
print(getwd())
## [1] "/Users/mareba/scCT_snakemake_final"
brain1 <- Read10X(config$input$Sox10_RNA$replicate1)
brain2 <- Read10X(config$input$Sox10_RNA$replicate2)
# Create seurat objects
brain1 <- CreateSeuratObject(counts = brain1, project = "scRNA", min.cells = 3, min.features = 200)
brain2 <- CreateSeuratObject(counts = brain2, project = "scRNA", min.cells = 3, min.features = 200)
# Add replicate information to metadata
brain1$sample <- "rep1"
brain2$sample <- "rep2"
# Merge the seurat objects
brain <- merge(brain1,brain2)
## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
rm(list=c('brain1','brain2'))
# Remove GFP from count matrix
GFP <- brain[['RNA']]@counts['GFP',]
brain <- brain[-grep(pattern = 'GFP',x = rownames(brain)),]
# Add GFP to object metadata
brain$GFP_counts <- GFP[rownames(brain@meta.data)]
brain$GFP_logcounts <- log(brain$GFP_counts + 1,10)
# Look at mitochondrial genes ratios
brain[["percent.mt"]] <- PercentageFeatureSet(brain, pattern = "^mt-")
VlnPlot(brain, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3,pt.size = 0.1)

plot1 <- FeatureScatter(brain, feature1 = "nCount_RNA", feature2 = "percent.mt",pt.size = 0.2)
plot2 <- FeatureScatter(brain, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",pt.size=0.2)
plot1 + plot2

brain <- subset(brain, subset = nFeature_RNA > 200 & percent.mt < 15)
brain <- NormalizeData(brain, normalization.method = "LogNormalize", scale.factor = 10000)
brain <- FindVariableFeatures(brain, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(brain), 10)
plot1 <- VariableFeaturePlot(brain)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis

all.genes <- rownames(brain)
brain <- ScaleData(brain, features = all.genes)
## Centering and scaling data matrix
brain <- RunPCA(brain, features = VariableFeatures(object = brain))
## PC_ 1
## Positive: Marcks, Id3, Slc7a11, Mef2c, Cyba, Clic1, Itgb5, Hes5, Fcgrt, Vcam1
## Ctsh, Fjx1, Slco1c1, B2m, Pdgfrb, Ier5, Lmo4, Nrp1, Ifitm3, Eva1b
## Igfbp7, Vtn, Fabp7, Rarres2, Ifitm2, Ly6e, Nr2f2, Dlc1, Col4a1, H2-D1
## Negative: Tspan2, Ugt8a, Cldn11, Fa2h, Tubb4a, Ermn, Mobp, Mog, Mag, Plp1
## Tmem151a, Mbp, Stmn4, Pllp, Tmeff2, Enpp2, Ank3, Cnp, Elavl3, Tmem88b
## Mal, Slain1, Ttll7, Qdpr, Opalin, Epb41l3, Ndrg1, Grb14, Tppp, Gjc2
## PC_ 2
## Positive: Laptm5, Cx3cr1, Csf1r, Ly86, C1qc, C1qa, Selplg, C1qb, Fcer1g, Ctss
## P2ry12, Siglech, Fcgr3, Trem2, Tyrobp, Gpr34, Cd53, Fcrls, Ltc4s, P2ry13
## Tmem119, Cyth4, Rgs10, Aif1, Cd68, Pld4, Cd300c2, Mpeg1, Fcgr1, Irf8
## Negative: Ptn, Dbi, Fjx1, Hes5, Slc7a11, Fabp7, Vcam1, S100b, Id4, Id3
## Scrg1, Rgcc, Hsd11b1, Nnat, Etnppl, Tubb2b, Rmst, Igfbp2, Mgst1, Gria1
## Pcdh7, Pfn2, Sox6, Meis2, Slco1c1, Grin3a, Tubb4b, Atp1b1, Hapln1, Cbr3
## PC_ 3
## Positive: Hes5, Fjx1, C1qc, Ctsd, C1qa, C1qb, Hexb, Dbi, Ctss, P2ry12
## Lair1, Cx3cr1, Trem2, Ly86, Selplg, Gpr34, Csf1r, Fcer1g, Tyrobp, Siglech
## Laptm5, Fcgr3, Fcrls, Frmd4a, Id4, P2ry13, S100b, Ptgs1, Cd53, Tubb2b
## Negative: Col4a1, Igfbp7, Col4a2, Hspb1, Itga1, Esam, Ebf1, Tm4sf1, Rbpms, Slc22a8
## Rgs5, Cald1, Arhgap29, Myo1b, Car4, Emcn, Cxcl12, Flt1, Ifitm3, Eng
## Gjc1, Ndufa4l2, Tagln2, Slc9a3r2, Higd1b, Ifitm2, Foxc1, Myl9, Adgrf5, Nid1
## PC_ 4
## Positive: Ly6c1, Ly6a, Ptprb, Slco1a4, Egfl7, Adgrl4, Sox18, Abcb1a, Pglyrp1, Kdr
## Pecam1, Itm2a, Flt1, Rasip1, Cyyr1, Ctla2a, Slc7a1, Srarp, Icam2, Spock2
## Cdh5, Cldn5, Kank3, Lsr, Klf2, Sox17, Cd34, Fn1, Tek, Podxl
## Negative: Ndufa4l2, Slc6a20a, Cd248, Kcnj8, Pdgfrb, Rgs4, Higd1b, Slc38a11, Foxd1, Slc19a1
## Carmn, Atp13a5, Ggt5, Nbl1, Myl9, Gper1, Il34, Aspn, Cspg4, Cox4i2
## Cald1, Abcc9, Ace2, Ifitm1, Col1a2, Vtn, Art3, Rem1, P2ry14, S1pr3
## PC_ 5
## Positive: Rtn1, Opcml, Gpr17, Tnr, Tmem108, Neu4, 9630013A20Rik, Cyfip2, Map1b, Fyn
## Igsf9b, 1810041L15Rik, Vcan, Mycl, 9530059O14Rik, Bmp4, 3110035E14Rik, Slc1a1, Dynll2, 5730559C18Rik
## Ptpre, Sema3d, Marcks, 3632451O06Rik, Rnf128, Pdcd4, Ust, Enpp6, Elfn2, Ppfibp1
## Negative: Car2, Gpr37, Slco1c1, Fth1, Clmn, Mal, Gng11, Efhd1, Ndrg1, Ptprd
## Ermn, Clic4, Neat1, Mobp, Car14, Opalin, Sept4, Pdlim2, Ppp1r14a, Fjx1
## Id3, Etnppl, Slc48a1, Tnfaip6, Hsd11b1, Nkx6-2, Pde8a, Plekhh1, Prr18, Ptn
brain <- RunHarmony(brain,group.by.vars = 'sample',dims.use = 1:40)
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony 8/10
## Harmony 9/10
## Harmony converged after 9 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details
## on syntax validity
DimHeatmap(brain, dims = 1:25, cells = 500, balanced = TRUE)

ElbowPlot(brain)

brain <- FindNeighbors(brain, dims = 1:20,reduction = 'harmony')
## Computing nearest neighbor graph
## Computing SNN
brain <- FindClusters(brain, resolution = 0.02)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11628
## Number of edges: 390979
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9929
## Number of communities: 10
## Elapsed time: 0 seconds
brain <- RunUMAP(brain, dims = 1:20,reduction = 'harmony')
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:27:59 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:27:59 Read 11628 rows and found 20 numeric columns
## 15:27:59 Using Annoy for neighbor search, n_neighbors = 30
## 15:27:59 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:28:01 Writing NN index file to temp file /var/folders/zc/29jxbf153g37b_cjp6_gkxnxxh967n/T//RtmpRHMKML/file555f5a5840b7
## 15:28:01 Searching Annoy index using 1 thread, search_k = 3000
## 15:28:04 Annoy recall = 100%
## 15:28:05 Commencing smooth kNN distance calibration using 1 thread
## 15:28:05 Initializing from normalized Laplacian + noise
## 15:28:07 Commencing optimization for 200 epochs, with 508962 positive edges
## 15:28:13 Optimization finished
DimPlot(brain, reduction = "umap",label=TRUE)

#brain <- RunTSNE(brain, dims = 1:20)
#DimPlot(brain, reduction = "tsne",label=TRUE)
# new.cluster.ids <- c("Astrocytes","Oligodendrocytes","VLMCs","Microglia","")
markers <- FindAllMarkers(brain, min.pct = 0.4)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
top10 <- markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
p1 <- DoHeatmap(brain, features = top10$gene) + NoLegend()
p1

saveRDS(object = brain, file = "results/Sox10_RNA/clustering/all_cells/01.clustering.Rds")
write.csv2(markers,"results/Sox10_RNA/clustering/all_cells//markers.csv")
png("results/Sox10_RNA/clustering/all_cells/heatmap.png",width=1024,height=1024)
p1
dev.off()
## quartz_off_screen
## 2
GFP+ cells
brain.GFP <- brain[,brain$GFP_counts > 5]
brain.GFP <- NormalizeData(brain.GFP, normalization.method = "LogNormalize", scale.factor = 10000)
brain.GFP <- ScaleData(brain.GFP)
## Centering and scaling data matrix
brain.GFP <- FindVariableFeatures(brain.GFP, selection.method = "vst", nfeatures = 2000)
brain.GFP <- RunPCA(brain.GFP, features = VariableFeatures(object = brain.GFP))
## Warning in PrepDR(object = object, features = features, verbose = verbose):
## The following 210 features requested have not been scaled (running reduction
## without them): Clec4a3, Myo1g, Slc11a1, Psmb9, Fam167b, C5ar1, Gm28729, Epsti1,
## Gabrg2, Srrm3, Haspin, Armc3, H2-K1, Clic6, Uncx, Sla, Cfap221, Gm10800,
## Htr2c, Cd300ld, Stat1, Lrrc3, Zcchc18, Hlx, Ifi209, Bcl6b, Vstm2l, AY036118,
## Vamp5, She, Slfn9, Chd3, Cnksr2, Aqp5, Ldha, Cspg5, Fat2, Cplx1, Zfp985, Sulf1,
## Irgm1, Fermt3, Prdx2, Fibin, Gm5127, Mal2, Adamts12, Ikzf1, Camsap2, Gda,
## Hcst, Adgrl2, AU020206, Myo1f, Tmsb4x, Bmp7, Wwtr1, Rasd1, Eno2, Igsf1, Lrrn3,
## Igtp, Slc2a1, Rgs8, Gcsam, Birc2, Scube3, Fxyd7, Ccdc65, Ago3, Shcbp1, Tgfbr2,
## Ecm2, Cdo1, Grm1, Chd3os, Kcnk2, Dynll1, Syt7, Smim5, Aprt, Tbx3, Vstm4, Rmi2,
## Zdhhc20, Cmtm8, Mllt11, Tshz2, Pde5a, 2210011C24Rik, Asb2, Kif1bp, Tmem132b,
## Crocc, Gm12326, Utrn, S1pr4, Gpr50, Podxl2, Il10ra, Crip2, Apobec2, Rhbdl2,
## 2700046A07Rik, Elmo1, Bag1, Mfsd2a, Qpct, Spag5, Nexn, Foxf2, Kdelr3, Ggt1,
## Cav2, Barhl1, Cnih2, Gm21188, Htra3, Cdh11, Fstl5, Pax7, Scrt2, Lilra5, Abca9,
## Syt2, Sapcd1, Fam19a2, Arhgap31, Abcg1, Tgfb1i1, Rffl, Rictor, Aldh1a1, Gng8,
## Snhg18, Tpd52, Fgd5, Lurap1l, Adamts1, Rnf13, Angpt2, Tfpi, 5330434G04Rik,
## Amz1, Gadd45a, Dab1, Arvcf, Unc13c, Pik3c2b, Lcat, Dock9, Pdlim3, Prr15, Six2,
## Grik3, Cd55, F5, Ptprg, Pcsk1n, Anpep, B3gat2, Gm42912, Tcte1, Neurl3, Xaf1,
## Col9a2, Trim30a, Cd300a, Dclk3, Nkd1, Bbox1, Chst8, Odc1, Ptpn6, Srpk1, Patj,
## Gfra2, Reep5, Gm12688, Nos1, Rhob, Ivns1abp, Reep1, Fbxo32, Syne2, Tacc3, Spry1,
## Fbln1, Slc16a3, Crispld2, Sap30bp, Hic1, Prrx2, Npy1r, Tspan4, Hist1h1a, Kcnma1,
## Ppp1r3g, Bex3, Mtap, Atp10a, Fank1, Ifi44, Homer2, Lamb2, Pantr1, Gde1, Tgfa,
## Kcnj12, Fam43a
## PC_ 1
## Positive: Ugt8a, Fa2h, Tspan2, Mag, Cldn11, Mog, Tmem151a, Ermn, Tubb4a, Pllp
## Stmn4, Mobp, Ank3, Elavl3, Tmeff2, Plp1, Tmem88b, Mbp, Slain1, Cnp
## Tppp, Mal, Enpp2, Lgi3, Ttll7, Gjc2, Tmeff1, Epb41l3, Evi2a, Bin1
## Negative: Id3, Slc7a11, Vcam1, Fjx1, Pdgfrb, Rarres2, Marcks, Slco1c1, Hes5, Lmo4
## Nr2f2, Fcgrt, Rgcc, Eva1b, Dlc1, Ifitm2, Epas1, Itih5, Ifitm3, Igfbp2
## Bsg, Id4, Fabp7, Col4a1, Vtn, Igfbp7, Sod3, Tubb2b, Nrp1, Mgst1
## PC_ 2
## Positive: Dbi, Fjx1, Hes5, S100b, Id4, Scrg1, Tubb2b, Cadm1, Pcdh7, Hsd11b1
## Rmst, Etnppl, Vcam1, Fabp7, Pfn2, Meis2, Slc7a11, Gria1, Frmd4a, Tcf7l2
## Hapln1, Sox6, Nnat, Cbr3, 2900052N01Rik, Rgcc, Il33, Lhx2, Slco1c1, Lmo4
## Negative: Col4a1, Igfbp7, Col4a2, Cald1, Hspb1, Itga1, Ebf1, Rbpms, Myo1b, Esam
## Tm4sf1, Rgs5, Eng, Arhgap29, Ifitm3, Tagln2, Ifitm2, Slc22a8, Ndufa4l2, Myl9
## Car4, Cxcl12, Gjc1, Lamc1, Nid1, Emcn, Higd1b, S100a11, Eva1b, Slc9a3r2
## PC_ 3
## Positive: Slc1a1, Tnr, Opcml, Gpr17, Neu4, Rtn1, Tmem108, Mycl, Ptpre, 9630013A20Rik
## Cyfip2, 1810041L15Rik, Igsf9b, 9530059O14Rik, Vcan, Fyn, 5730559C18Rik, 3110035E14Rik, Gm38039, Mpzl1
## Dynll2, Sema3d, Rnf128, Lims2, Ptpro, Sgk1, Ppfibp1, 4430402I18Rik, Map1b, Sh3bp4
## Negative: Ptn, Car2, Sept4, Gng11, Gpr37, Pdgfrb, Ppp1r14a, Pdlim2, Clmn, Atp13a5
## Ptprd, Notch3, Daam1, Sod3, Efhd1, Rgs4, Rarres2, Fth1, Igsf8, Ndufa4l2
## Kcnj8, Slc38a11, Higd1b, Slco3a1, Prr5l, Gper1, Cox4i2, Pth1r, Aspn, Ermn
## PC_ 4
## Positive: Adgrl4, Ly6c1, Ly6a, Sox18, Ptprb, Pecam1, Egfl7, Kdr, Abcb1a, Slco1a4
## Rasip1, Icam2, Cd34, Ly6e, Pglyrp1, Cyyr1, Ctla2a, Podxl, Klf2, Srgn
## Sox17, Tek, Kank3, Lsr, Cdh5, Klf4, Ushbp1, Gimap1, AU021092, Mecom
## Negative: Cald1, Cspg4, Cd248, Slc19a1, Ndufa4l2, Slc6a20a, Kcnj8, Slc38a11, Rgs4, Il34
## Bgn, Nbl1, Pdgfrb, Ggt5, Carmn, Higd1b, Vtn, Myl9, Zic1, Foxd1
## Bmp4, Gper1, Aspn, P2ry14, Cox4i2, Ifitm1, Atp13a5, S1pr3, Abcc9, Ace2
## PC_ 5
## Positive: Igfbp4, Bmp6, Serpinf1, Kctd12, Apod, Mest, Frzb, Fbln2, Col23a1, Igf1
## Npy, Foxd3, Wnt6, Olfml3, Cemip, Col11a1, Cyp1b1, Dcn, Hpgd, Fxyd6
## Fndc1, Fmo1, Shisa2, Sgcd, Col12a1, Lum, Col18a1, Col5a2, Abca8a, Lpcat2
## Negative: Rgs5, Esam, Car4, Higd1b, Itpr2, Tubb2b, Rgs4, Atp13a5, Notch3, Ebf1
## Cox4i2, Slc38a11, Ndufa4l2, Cspg4, Ace2, Art3, Tm4sf1, Kcnj8, Aspn, Tbxa2r
## Gjc1, Slco1c1, Abcc9, Cadm1, Atp2a3, Flt1, Gper1, Ddit4l, Pth1r, Ackr3
brain.GFP <- RunUMAP(brain.GFP, dims = 1:8,reduction = 'pca')
## 15:38:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:38:28 Read 5488 rows and found 8 numeric columns
## 15:38:28 Using Annoy for neighbor search, n_neighbors = 30
## 15:38:28 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:38:30 Writing NN index file to temp file /var/folders/zc/29jxbf153g37b_cjp6_gkxnxxh967n/T//RtmpRHMKML/file555f5655f8ba
## 15:38:30 Searching Annoy index using 1 thread, search_k = 3000
## 15:38:32 Annoy recall = 100%
## 15:38:32 Commencing smooth kNN distance calibration using 1 thread
## 15:38:33 Initializing from normalized Laplacian + noise
## 15:38:33 Commencing optimization for 500 epochs, with 225534 positive edges
## 15:38:39 Optimization finished
brain.GFP <- FindNeighbors(brain.GFP, dims = 1:40,reduction = 'pca')
## Computing nearest neighbor graph
## Computing SNN
brain.GFP <- FindClusters(brain.GFP, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5488
## Number of edges: 223338
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9734
## Number of communities: 10
## Elapsed time: 0 seconds
DimPlot(brain.GFP, reduction = "umap",label=TRUE)

new.cluster.ids <- c("Astrocytes","Oligodendrocytes","Astrocytes","OEC","Pericytes","COP-NFOL","VEC","VLMC","OPC","VLMC")
names(new.cluster.ids) <-levels(brain.GFP@active.ident)
brain.GFP <- RenameIdents(brain.GFP,new.cluster.ids)
brain.GFP@active.ident <- factor(brain.GFP@active.ident,levels = levels(brain.GFP@active.ident)[c(1,3,9,6,2,4,8,10,7,5)])
DimPlot(brain.GFP,group.by = 'sample',pt.size=0.2)

DimPlot(brain.GFP,label=TRUE,pt.size=0.2)

markers.GFP <- FindAllMarkers(brain.GFP, min.pct = 0.2)
## Calculating cluster Astrocytes
## Calculating cluster OEC
## Calculating cluster VEC
## Calculating cluster Oligodendrocytes
## Calculating cluster Pericytes
## Calculating cluster OPC
## Calculating cluster VLMC
## Calculating cluster COP-NFOL
top10 <- markers.GFP %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
p1 <- DoHeatmap(brain.GFP, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(brain.GFP, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the RNA assay:
## Cldn10, Mt1, Ntsr2, Slc7a10, Gm3764, Aqp4, Slc6a11, Mt3, Slc1a2, Clu
p1

VlnPlot(brain.GFP,features=c("Sox10","Olig2","Pdgfra","Sox9"),pt.size=0,ncol = 2)

VlnPlot(brain.GFP,features=c("Mbp","Mog","C1ql1","Neu4"),pt.size=0,ncol = 2)

VlnPlot(brain.GFP,features=c("Aqp4","Slc1a2","Sox9","Rfx4"),pt.size=0,ncol = 2)

brain.GFP$cell_type <- brain.GFP@active.ident
saveRDS(object = brain.GFP, file = "results/Sox10_RNA/clustering/GFP/01.clustering.Rds")
write.csv2(markers.GFP,"results/Sox10_RNA/clustering/GFP/markers.csv")
png("results/Sox10_RNA/clustering/GFP/heatmap.png",width=1024,height=1024)
p1
dev.off()
## quartz_off_screen
## 2